library(ggplot2)
library(ggthemes)
library(ggsignif)
library(data.table)
library(huxtable) ## Need flextable installed to export tables. 
library(modelsummary)
library(stringr)
library(gridExtra)
library(ggeffects)

setwd("~/Dropbox/Projects/Local_Party_Org/Audit Paper/PRQ Submission/Resubmit/Final Copy/Replication/")
all_df <- read.csv("data.csv")


tmp <- reshape2::melt(prop.table(table(all_df$Gender, all_df$replied,  all_df$party), c(1,3)))

dem_test <- prop.test(table(all_df$Gender[all_df$party=="Democrat"], all_df$replied[all_df$party=="Democrat"]))$conf.int
dem_test <-round(100*dem_test,2)
dem_test <- paste0("(",dem_test[1], ", ", dem_test[2], ")")

rep_test <- prop.test(table(all_df$Gender[all_df$party=="Republican"], all_df$replied[all_df$party=="Republican"]))$conf.int
rep_test <-round(100*rep_test,2)
rep_test <- paste0("(",rep_test[1], ", ", rep_test[2], ")")


# tmp$Var2 <- factor(tmp$Var2, levels=c(0, 1), labels=c("No", "Yes"))
tmp <- tmp[tmp$Var2==1,]


anno_data <- data.frame("annotation"=c(dem_test, rep_test), 
                        "Var3"=c("Democrat", "Republican"), 
                        "y_position" = c(by(tmp$value, tmp$Var3, max)) + .05,
                        "tip_length"= .1,
                        "xmin"=1, 
                        "xmax"=2)

p1 <- ggplot(tmp,
             aes(x=Var1,  y=value))  + geom_col(position=position_dodge2()) + 
  theme_minimal() + scale_y_continuous("Percent Responded", labels=scales::label_percent(1)) + 
  geom_signif(data=anno_data, 
              aes(annotations=annotation, xmin=xmin, xmax=xmax, 
                  y_position=y_position, tip_length=tip_length), manual=T, 
              inherit.aes = F) + 
  geom_text(aes(label=scales::percent(value)), vjust=0) + 
  facet_wrap(~Var3) + 
  labs(x="Gender") + ggtitle("Response Rate by Gender") + theme(plot.title = element_text(hjust=.5))


tmp <- reshape2::melt(prop.table(table(all_df$Race, all_df$replied,  all_df$party), c(1,3)))

dem_test <- prop.test(table(all_df$Race[all_df$party=="Democrat"], all_df$replied[all_df$party=="Democrat"]))$conf.int
dem_test <-round(100*dem_test,2)
dem_test <- paste0("(",dem_test[1], ", ", dem_test[2], ")")

rep_test <- prop.test(table(all_df$Race[all_df$party=="Republican"], all_df$replied[all_df$party=="Republican"]))$conf.int
rep_test <-round(100*rep_test,2)
rep_test <- paste0("(",rep_test[1], ", ", rep_test[2], ")")


# tmp$Var2 <- factor(tmp$Var2, levels=c(0, 1), labels=c("No", "Yes"))
tmp <- tmp[tmp$Var2==1,]


anno_data <- data.frame("annotation"=c(dem_test, rep_test), 
                        "Var3"=c("Democrat", "Republican"), 
                        "y_position" = c(by(tmp$value, tmp$Var3, max)) + .05,
                        "tip_length"= .1,
                        "xmin"=1, 
                        "xmax"=2)

p2 <- ggplot(tmp,
             aes(x=Var1,  y=value))  + geom_col(position=position_dodge2()) + 
  theme_minimal() + scale_y_continuous("Percent Responded", labels=scales::label_percent(1)) + 
  geom_signif(data=anno_data, 
              aes(annotations=annotation, xmin=xmin, xmax=xmax, 
                  y_position=y_position, tip_length=tip_length), manual=T, 
              inherit.aes = F) + 
  geom_text(aes(label=scales::percent(value)), vjust=0) + 
  facet_wrap(~Var3) + 
  labs(x="Race") + ggtitle("Response Rate by Race") + theme(plot.title = element_text(hjust=.5))


tmp <- reshape2::melt(prop.table(table(all_df$Class_Type, all_df$replied,  all_df$party), c(1,3)))

dem_test <- prop.test(table(all_df$Class_Type[all_df$party=="Democrat"], all_df$replied[all_df$party=="Democrat"]))$conf.int
dem_test <-round(100*dem_test,2)
dem_test <- paste0("(",dem_test[1], ", ", dem_test[2], ")")

rep_test <- prop.test(table(all_df$Class_Type[all_df$party=="Republican"], all_df$replied[all_df$party=="Republican"]))$conf.int
rep_test <-round(100*rep_test,2)
rep_test <- paste0("(",rep_test[1], ", ", rep_test[2], ")")


# tmp$Var2 <- factor(tmp$Var2, levels=c(0, 1), labels=c("No", "Yes"))
tmp <- tmp[tmp$Var2==1,]


anno_data <- data.frame("annotation"=c(dem_test, rep_test), 
                        "Var3"=c("Democrat", "Republican"), 
                        "y_position" = c(by(tmp$value, tmp$Var3, max)) + .05,
                        "tip_length"= .1,
                        "xmin"=1, 
                        "xmax"=2)

p3 <- ggplot(tmp,
             aes(x=Var1,  y=value))  + geom_col(position=position_dodge2()) + 
  theme_minimal() + scale_y_continuous("Percent Responded", labels=scales::label_percent(1)) + 
  geom_signif(data=anno_data, 
              aes(annotations=annotation, xmin=xmin, xmax=xmax, 
                  y_position=y_position, tip_length=tip_length), manual=T, 
              inherit.aes = F) + 
  geom_text(aes(label=scales::percent(value)), vjust=0) + 
  facet_wrap(~Var3) + 
  labs(x="Class") + ggtitle("Response Rate by Class") + theme(plot.title = element_text(hjust=.5))

tmp <- reshape2::melt(prop.table(table(all_df$Ideology, all_df$replied,  all_df$party), c(1,3)))

dem_test <- prop.test(table(all_df$Ideology[all_df$party=="Democrat"], all_df$replied[all_df$party=="Democrat"]))$conf.int
dem_test <-round(100*dem_test,2)
dem_test <- paste0("(",dem_test[1], ", ", dem_test[2], ")")

rep_test <- prop.test(table(all_df$Ideology[all_df$party=="Republican"], all_df$replied[all_df$party=="Republican"]))$conf.int
rep_test <-round(100*rep_test,2)
rep_test <- paste0("(",rep_test[1], ", ", rep_test[2], ")")


# tmp$Var2 <- factor(tmp$Var2, levels=c(0, 1), labels=c("No", "Yes"))
tmp <- tmp[tmp$Var2==1,]


anno_data <- data.frame("annotation"=c(dem_test, rep_test), 
                        "Var3"=c("Democrat", "Republican"), 
                        "y_position" = c(by(tmp$value, tmp$Var3, max)) + .05,
                        "tip_length"= .1,
                        "xmin"=1, 
                        "xmax"=2)

p4 <- ggplot(tmp,
             aes(x=Var1,  y=value))  + geom_col(position=position_dodge2()) + 
  theme_minimal() + scale_y_continuous("Percent Responded", labels=scales::label_percent(1)) + 
  geom_signif(data=anno_data, 
              aes(annotations=annotation, xmin=xmin, xmax=xmax, 
                  y_position=y_position, tip_length=tip_length), manual=T, 
              inherit.aes = F) + 
  geom_text(aes(label=scales::percent(value)), vjust=0) + 
  facet_wrap(~Var3) +
  labs(x="Ideology") + ggtitle("Response Rate by Ideology") + theme(plot.title = element_text(hjust=.5))



p <-  grid.arrange(p1, p2)
# ggsave("init_plot.png", plot=p, height=10, width=8)

p <-  grid.arrange(p3, p4)
# ggsave("ideo_class.png", plot=p, height=10, width=8)



tab <- table(all_df$type, all_df$party)
tab <- as.data.frame(prop.table(tab, 2))
tab <- huxtable(dcast(tab, Var1~Var2))
tmp1 <- prop.table(table(all_df$type))
tmp2 <- (table(all_df$party[!is.na(all_df$type)]))
tmp2 <- c(tmp2, sum(tmp2))

tab <- add_columns(tab,after=3, huxtable("Total"=as.vector(tmp1), add_rownames = F)) %>%
  add_rows(after=4, t(huxtable("N"=tmp2))) %>%
  set_number_format(row=2:4, col=2:4, fmt_percent()) %>%
  set_number_format(row=5, col=2:4, fmt_pretty()) %>%
  set_top_border(row=c(2,5), col=1:4, 1) %>%
  huxtable::set_caption("Contact Person Found")

tab[1,1] <- ""


tab2 <- table(all_df$source, all_df$party)
tab2 <- as.data.frame(prop.table(tab2, 2))
tab2 <- huxtable(dcast(tab2, Var1~Var2))
tmp1 <- prop.table(table(all_df$source))
tmp2 <- (table(all_df$party[!is.na(all_df$source)]))
tmp2 <- c(tmp2, sum(tmp2))

tab2 <- add_columns(tab2,after=3, huxtable("Total"=as.vector(tmp1), add_rownames = F)) %>%
  add_rows(after=6, t(huxtable("N"=tmp2))) %>%
  set_number_format(row=2:6, col=2:4, fmt_percent()) %>%
  set_number_format(row=7, col=2:4, fmt_pretty()) %>%
  # set_top_border(row=c(2,6), col=1:4, 1) %>%
  huxtable::set_caption("Email Source")

tab2[1,1] <- ""
tab2 <- tab2[c(1,4,2,3,6,5,7),]
tab2 <- set_top_border(tab2, row=c(2,7), col=1:4, 1)
tab2



# save_as_docx(huxtable::as_flextable(tab), 
#              huxtable::as_flextable(tab2),
#              path="email_descriptives.docx")


mod_df <- na.omit(all_df[,c("replied", "Gender", "Race", "Class_Type", "Ideology",
                            "percent_one_race_black", "latent_estimate", 
                            "hh_income","POPPCT_URBAN","dem_pct",
                            "clinton_pct_2pty", "donated")])




range_val <- function(x) sprintf("(%.2f, %.2f)", min(x), max(x))



cont_tab <- datasummary( latent_estimate +
                           hh_income +  
                           POPPCT_URBAN + 
                           dem_pct  +
                           clinton_pct_2pty + percent_one_race_black ~  Mean + SD  + range_val, 
                         data=mod_df, output = "huxtable")

cont_tab[,1] <- c("Continuous Variables", "Onlne Presence",
                  "Household Income (in $10,000)", 
                  "Proportion Urban",
                  "Democrat Vote Share (County)", 
                  "Democrat Vote Share (State)", 
                  "Percent Black")
cont_tab[1,4] <-"Range"



bin_tab <- datasummary(donated~1 , 
                       data=mod_df, output="huxtable")


bin_tab[,1] <- c("Categorical Variables",
                 "Donated", "")
# bin_tab[6:11,2] <- c("No", "Yes", "No", "Yes", "No", "Yes")
# bin_tab[1,2] <- "Category"
bin_tab[1,2] <- "Count"
bin_tab <- set_number_format(bin_tab, everywhere, 2, "%.f")

bin_tab <- add_columns(bin_tab, huxtable("Proportion"=
                                           as.numeric(unlist(bin_tab[-1,2]))/nrow(mod_df)),
                       after=2)
bin_tab <- add_columns(bin_tab, huxtable("Value"=
                                           c("Yes", "No")),
                       after=1)
bin_tab <- set_number_format(bin_tab, everywhere, 4, "%.2f")

bin_tab <- set_top_border(bin_tab, 1, 2:4, 1)
bin_tab <- set_bottom_border(bin_tab, c(1,3), 2:4, 1)

all_tab <- add_rows(cont_tab, bin_tab)

all_tab <- set_align(all_tab, 1:9, 4, "center")
all_tab <- set_all_padding(all_tab, value = 1)

all_tab



# save_as_docx(huxtable::as_flextable(all_tab), 
#              path="descriptives.docx")


mod_dem <- glm(replied~ Gender + Race + Class_Type+ 
                 Ideology + latent_estimate +
                 hh_income +  percent_one_race_black + 
                 POPPCT_URBAN + 
                 dem_pct  + I(dem_pct^2)  + 
                 clinton_pct_2pty  +  I(clinton_pct_2pty^2)  + 
                 donated  , data=all_df, family = "binomial", 
               subset=party=="Democrat"  )


mod_rep <- glm(replied~ Gender + Race + Class_Type+ 
                 Ideology + latent_estimate +
                 hh_income + percent_one_race_black + 
                 POPPCT_URBAN + 
                 dem_pct  + I(dem_pct^2)  + 
                 clinton_pct_2pty  +  I(clinton_pct_2pty^2)  + 
                 donated  , data=all_df, family = "binomial", 
               subset=party=="Republican" )

summary(mod_dem)
summary(mod_rep)



coef_map <- c("GenderMale"="Sender: Male", 
              "RaceWhite"="Sender: White", 
              "Class_TypeWorking"="Sender: Working Class", 
              "IdeologyModerate"="Sender: Moderate", 
              "latent_estimate"="Online Presence",
              "log(total)"="County Population (Logged)", 
              "hh_income"="Household Income",
              "donatedYes" = "Donated", 
              "POPPCT_URBAN"="Proportion Urban",
              "percent_one_race_black" = "Percent Black",
              "dem_pct" = "Dem Vote (County)", 
              "I(dem_pct^2)"="Dem Vote (County) Squared",
              "clinton_pct_2pty" = "Dem Vote (State)",
              "I(clinton_pct_2pty^2)"="Dem Vote (State) Squared",
              "(Intercept)"="Intercept")


model_tab <-msummary(list("Republican"= mod_rep, 
                          "Democrat"=mod_dem), stars = c("*"=.05, "**"=0.01), 
                     fmt = "%.2f",
                     title="Audit Responsiveness by Party",
                     coef_map=coef_map, output = "huxtable")



model_tab <-  set_align(model_tab, everywhere,2:3, "center") %>%
  set_all_padding(value = 1)

# colnames(model_tab) <- NULL
model_tab
# flextable::save_as_docx(huxtable::as_flextable(model_tab),
#                         path="Main_Results.docx")


tmp_dem <- ggpredict(mod_dem, terms=c("dem_pct [all]"))
ggpredict(mod_dem, terms=c("dem_pct [.25, .5, .75]"))

p1 <- plot(tmp_dem) + labs(y="Probability of Reply", x="Share of Democratic Vote (County)") + 
  scale_x_continuous(labels=scales::label_percent()) + ggtitle("Democratic Parties")

tmp_rep <- ggpredict(mod_rep, terms=c("dem_pct [all]"))
ggpredict(mod_rep, terms=c("dem_pct [.25, .5, .75]"))

# tmp$data$x <- 1-tmp$data$x
p2 <- plot(tmp_rep) + labs(y="Probability of Reply", x="Share of Democratic Vote (County)") + 
  scale_x_continuous(labels=scales::label_percent()) + 
  ggtitle("Republican Parties")

tmp_rep$Party <- "Republican"
tmp_dem$Party <- "Democrat"
tmp <- rbind(tmp_rep, tmp_dem)

p <- grid.arrange(p1, p2)

# ggsave("vote_share_prediction.png",
#        p,
#        height=8, width=6)

p <- ggplot(tmp, aes(x=x, y=predicted, ymin=conf.low, 
                     ymax=conf.high, fill=Party, color=Party)) + 
  geom_ribbon(alpha=.5) + geom_line() +  
  labs(y="Probability of Reply", x="Share of Democratic Vote (County)") + 
  scale_x_continuous(labels=scales::label_percent()) + 
  ggtitle("") + theme_minimal() + 
  theme(legend.position = "bottom") + 
  ggthemes::scale_color_colorblind() + 
  ggthemes::scale_fill_colorblind()

# ggsave("vote_share_prediction_single.png",
#        p,
#        height=5, width=7)

#### Dummying out Stuff ####

all_df$dem_categories <- cut(all_df$dem_pct, 
                             breaks = c(0, .25, .45, .55,  .75, 1), 
                             labels=c("Solid Republican", "Lean Republican", 
                                      "Toss-Up", "Lean Democrat", "Solid Democrat"))
all_df$dem_categories <- relevel(all_df$dem_categories, ref = "Toss-Up")
table(all_df$dem_categories)
mod_dem_dum <- glm(replied~ Gender + Race + Class_Type+ 
                     Ideology + latent_estimate + percent_one_race_black + 
                     hh_income +  
                     POPPCT_URBAN + 
                     dem_categories  + 
                     clinton_pct_2pty  +  I(clinton_pct_2pty^2)  + 
                     donated  , data=all_df, family = "binomial", 
                   subset=party=="Democrat"  )

mod_rep_dum <- glm(replied~ Gender + Race + Class_Type+ 
                     Ideology + latent_estimate +
                     hh_income + percent_one_race_black + 
                     POPPCT_URBAN + 
                     dem_categories + 
                     clinton_pct_2pty  +  I(clinton_pct_2pty^2)  + 
                     donated  , data=all_df, family = "binomial", 
                   subset=party=="Republican" )

summary(mod_dem_dum)
summary(mod_rep_dum)

tmp_dem <- (ggeffect(mod_dem_dum, terms = "dem_categories"))
tmp_rep <- (ggeffect(mod_rep_dum, terms = "dem_categories"))

tmp_rep$Party <- "Republican"
tmp_dem$Party <- "Democrat"
tmp <- rbind(tmp_rep, tmp_dem)

tmp$x <- factor(tmp$x, levels(tmp$x)[c(2,3,1,4,5)])

p <- ggplot(tmp, aes(x=x, y=predicted, ymin=conf.low, 
                     ymax=conf.high, fill=Party, color=Party)) + 
  geom_pointrange(position = position_dodge(.5)) +
  labs(y="Probability of Reply", x="County Type") + 
  # scale_x_continuous(labels=scales::label_percent()) + 
  ggtitle("") + theme_minimal() + 
  theme(legend.position = "bottom") + 
  scale_color_colorblind() + 
  scale_fill_colorblind()

# ggsave("county_type_dummy_regression.png",
#        p,
#        height=5, width=7)


coef_map <- c("GenderMale"="Sender: Male", 
              "RaceWhite"="Sender: White", 
              "Class_TypeWorking"="Sender: Working Class", 
              "IdeologyModerate"="Sender: Moderate", 
              "percent_one_race_black" = "Percent Black",
              "RaceWhite:percent_one_race_black" = "% Black X Sender: White",
              "latent_estimate"="Online Presence",
              "log(total)"="County Population (Logged)", 
              "hh_income"="Household Income",
              "donatedYes" = "Donated", 
              "POPPCT_URBAN"="Proportion Urban",
              "dem_pct" = "Dem Vote (County)", 
              "I(dem_pct^2)"="Dem Vote (County) Squared",
              "dem_categoriesSolid Republican" = "County: Solid GOP", 
              "dem_categoriesLean Republican" = "County: Lean GOP", 
              "dem_categoriesToss-Up" = "County: Toss-Up", 
              "dem_categoriesLean Democrat" = "County: Lean Dem", 
              "dem_categoriesSolid Democrat" = "County: Solid Dem",
              "clinton_pct_2pty" = "Dem Vote (State)",
              "I(clinton_pct_2pty^2)"="Dem Vote (State) Squared",
              "(Intercept)"="Intercept")


model_tab <-msummary(list("Republican"= mod_rep_dum,
                          "Democrat"=mod_dem_dum), stars = c("*"=.05, "**"=0.01), 
                     fmt = "%.2f",
                     title="Audit Responsiveness by Party",
                     coef_map=coef_map, output = "huxtable")

# flextable::save_as_docx(huxtable::as_flextable(model_tab),
#                         path="Dummy_Results.docx")

#### Interactions ######


mod_dem_int <- glm(replied~ Gender + Race*percent_one_race_black + Class_Type+ 
                     Ideology + latent_estimate +
                     hh_income +  
                     POPPCT_URBAN + 
                     dem_pct  + I(dem_pct^2)  + 
                     clinton_pct_2pty  +  I(clinton_pct_2pty^2)  + 
                     donated  , data=all_df, family = "binomial", 
                   subset=party=="Democrat" )


mod_rep_int <- glm(replied~ Gender + Race*percent_one_race_black + Class_Type+ 
                     Ideology + latent_estimate +
                     hh_income +  
                     POPPCT_URBAN + 
                     dem_pct  + I(dem_pct^2)  + 
                     clinton_pct_2pty  +  I(clinton_pct_2pty^2)  + 
                     donated  , data=all_df, family = "binomial", 
                   subset=party=="Republican" )




coef_map <- c("GenderMale"="Sender: Male", 
              "RaceWhite"="Sender: White", 
              "Class_TypeWorking"="Sender: Working Class", 
              "IdeologyModerate"="Sender: Moderate", 
              "percent_one_race_black" = "Percent Black",
              "RaceWhite:percent_one_race_black" = "% Black X Sender: White",
              "latent_estimate"="Online Presence",
              "log(total)"="County Population (Logged)", 
              "hh_income"="Household Income",
              "donatedYes" = "Donated", 
              "POPPCT_URBAN"="Proportion Urban",
              "dem_pct" = "Dem Vote (County)", 
              "I(dem_pct^2)"="Dem Vote (County) Squared",
              "clinton_pct_2pty" = "Dem Vote (State)",
              "I(clinton_pct_2pty^2)"="Dem Vote (State) Squared",
              "(Intercept)"="Intercept")


model_tab <-msummary(list("Republican"= mod_rep_int, 
                          "Democrat"=mod_dem_int), stars = c("*"=.05, "**"=0.01), 
                     fmt = "%.2f",
                     title="Audit Responsiveness by Party",
                     coef_map=coef_map, output = "huxtable")

# flextable::save_as_docx(huxtable::as_flextable(model_tab),
#                         path="Interaction_Results.docx")


tmp_rep <- (ggeffect(mod_rep_int, 
                                terms=c("percent_one_race_black [all]", "Race")))
tmp_dem <- (ggeffect(mod_dem_int, 
                                terms=c("percent_one_race_black [all]", "Race")))

tmp_rep$Party <- "Republican"
tmp_dem$Party <- "Democrat"
tmp <- rbind(tmp_rep, tmp_dem)

p <- ggplot(tmp, aes(x=x, y=predicted, ymin=conf.low, 
                     ymax=conf.high, fill=group, color=group)) +
  facet_wrap(~Party) + 
  geom_ribbon(alpha=.5) + geom_line() +  
  labs(y="Probability of Reply", x="Percent Black") + 
  scale_x_continuous(labels=scales::label_percent()) + 
  ggtitle("") + theme_minimal() + 
  theme(legend.position = "bottom") + 
  ggthemes::scale_color_colorblind("Sender") + 
  ggthemes::scale_fill_colorblind("Sender")



# ggsave("interaction_prediction.png",
#        p,
#        height=6, width=8)
